import urllib.request

from bs4 import BeautifulSoup

import chardet


def __getHtml(url):
    """
    私有方法：获取网页html文本资源
    :param url: 网页链接
    :return: 网页文本资源
    """
    html = urllib.request.urlopen(url).read()
    charset = chardet.detect(html).get("encoding")
    htmlText = html.decode(charset, errors='ignore')

    return htmlText


def __sloveHtml(htmlText):
    """
    私有方法：解析HtmlText
    :param htmlText: 传入的资源
    """

    sentenceList = list()

    soup = BeautifulSoup(htmlText, "html.parser")
    "获取content节点的内容"
    div_node = soup.find('div', class_='content')

    "删除特定标签u"
    [s.extract() for s in div_node('u')]

    "获取P节点内容"
    p_node = div_node.find_all('p')
    for content in p_node:
        "以’、‘分割，并且取出第2个元素"
        text = content.get_text().split("、", 1)[1]
        "追加到list尾部"
        sentenceList.append(text)

    return sentenceList


def getSentenceList(url):
    """
    Spider提供的公用方法，提供情话list
    :param url: 获取情话的地址
    :return: 情话list
    """
    return __sloveHtml(__getHtml(url))
